import pandas as pd
import wikidata_plain_sparql as wikidata
from bokeh.palettes import turbo
from bokeh.plotting import figure, output_notebook, show, gridplot
# set bokeh output mode to notebook
output_notebook()
from helper import get_jhu_cached, create_grid
# get all dates from june 1st 2020 until yesterday
all_dates = pd.date_range(start='2020-05-31', end='today')[:-1]
# load data from all dates
all_data = []
for date in all_dates:
data = pd.read_csv(get_jhu_cached(date))
data['Date'] = date
all_data.append(data)
# combine data from all days
raw_data = pd.concat(all_data)
# group data by country
updates_per_country = raw_data.groupby(['Country_Region', 'Date']).agg(
Confirmed = ('Confirmed','sum'),
Deaths = ('Deaths','sum'),
).reset_index()
# get all cantons
all_countries = updates_per_country['Country_Region'].unique()
all_countries.sort()
# calculate difference between days
for country in all_countries:
updates_for_country = updates_per_country.loc[updates_per_country['Country_Region'] == country]
previous_index = None
total_cases = 0
for index in updates_for_country.index:
if previous_index != None:
new_cases = updates_per_country.at[index, 'Confirmed'] - updates_per_country.at[previous_index, 'Confirmed']
updates_per_country.at[index, 'New_Cases'] = new_cases
total_cases += new_cases
updates_per_country.at[index, 'Total_Cases'] = total_cases
updates_per_country.at[index, 'New_Deaths'] = updates_per_country.at[index, 'Deaths'] - updates_per_country.at[previous_index, 'Deaths']
previous_index = index
updates_per_country = updates_per_country[updates_per_country['Date'] >= '2020-06-01']
worldwide_pretty = updates_per_country.loc[:, ['Date', 'Country_Region', 'New_Cases', 'Total_Cases', 'New_Deaths']]
# calculate new cases
new_cases_graph = figure(title="New COVID-19 cases per country", y_axis_label='new cases', x_axis_type='datetime', sizing_mode='stretch_width')
palette = turbo(all_countries.size)
i = 0
for country in all_countries:
updates_for_country = updates_per_country.loc[updates_per_country['Country_Region'] == country]
new_cases_graph.line(updates_for_country['Date'], updates_for_country['New_Cases'], line_color=palette[i], legend_label=country, line_width=2)
i += 1
show(new_cases_graph)
# show total cases
total_cases_graph = figure(title="Total COVID-19 cases per country since 1st of june", y_axis_label='total cases', x_axis_type='datetime', sizing_mode='stretch_width')
palette = turbo(all_countries.size)
i = 0
for country in all_countries:
updates_for_country = updates_per_country.loc[updates_per_country['Country_Region'] == country]
total_cases_graph.line(updates_for_country['Date'], updates_for_country['Total_Cases'], line_color=palette[i], legend_label=country, line_width=2)
i += 1
show(total_cases_graph)
# get population data from WikiData
canton_data = wikidata.query('''
SELECT ?shortCode ?population ?canton WHERE {
?canton wdt:P31 wd:Q23058.
?canton wdt:P300 ?shortCode.
OPTIONAL {
?canton p:P1082 ?population_stmt.
?population_stmt ps:P1082 ?population.
?population_stmt pq:P585 ?population_date.
}
FILTER NOT EXISTS {
?canton p:P1082/pq:P585 ?population_date_.
FILTER (?population_date_ > ?population_date)
}
}
ORDER BY ?shortCode
''')
canton_data.set_index('shortCode', inplace=True)
raw_data = pd.read_csv('https://raw.githubusercontent.com/openZH/covid_19/master/COVID19_Fallzahlen_CH_total_v2.csv')
# convert to date
raw_data['date'] = pd.to_datetime(raw_data['date'])
# remove FL
swiss = raw_data[raw_data['abbreviation_canton_and_fl'] != 'FL']
# only use data after 1st of june
swiss = swiss[swiss['date'] >= '2020-05-31']
# only keep useful entries
conf_cases = swiss.loc[-swiss['ncumul_conf'].isna()].copy()
# get all cantons
all_cantons = conf_cases['abbreviation_canton_and_fl'].unique()
all_cantons.sort()
# calculate new cases
conf_cases['new_cases'] = 0
for canton in all_cantons:
updates_for_canton = conf_cases.loc[conf_cases['abbreviation_canton_and_fl'] == canton]
previous_index = None
total_cases = 0
for index in updates_for_canton.index:
if previous_index != None:
new_cases = conf_cases.at[index, 'ncumul_conf'] - conf_cases.at[previous_index, 'ncumul_conf']
conf_cases.at[index, 'new_cases'] = new_cases
conf_cases.at[index, 'new_cases_relative'] = new_cases / int(canton_data.at['CH-' + canton, 'population']) * 100000
total_cases += new_cases
conf_cases.at[index, 'total_cases'] = total_cases
conf_cases.at[index, 'total_cases_relative'] = total_cases / int(canton_data.at['CH-' + canton, 'population']) * 100000
conf_cases.at[index, 'new_deaths'] = conf_cases.at[index, 'ncumul_deceased'] - conf_cases.at[previous_index, 'ncumul_deceased']
previous_index = index
conf_cases = conf_cases[conf_cases['date'] >= '2020-06-01']
swiss_pretty = conf_cases.loc[:, ['date', 'abbreviation_canton_and_fl', 'new_cases', 'total_cases', 'new_deaths']]
# calculate new cases
graphs = []
max_new_cases = conf_cases['new_cases_relative'].max()
for canton in all_cantons:
update_for_canton = conf_cases.loc[conf_cases['abbreviation_canton_and_fl'] == canton].copy()
update_for_canton['new_cases_relative_avg'] = update_for_canton['new_cases_relative'].rolling(window=7).mean()
new_cases_graph = figure(title=canton, y_axis_label='new cases', y_range=[0, max_new_cases], x_axis_type='datetime')
new_cases_graph.line(update_for_canton['date'], update_for_canton['new_cases_relative'], line_width=1)
new_cases_graph.line(update_for_canton['date'], update_for_canton['new_cases_relative_avg'], line_color='red', line_width=1)
graphs.append(new_cases_graph)
show(create_grid(graphs, sizing_mode='scale_width'))
# calculate total cases
graphs = []
max_total_cases = conf_cases['total_cases_relative'].max()
for canton in all_cantons:
update_for_canton = conf_cases.loc[conf_cases['abbreviation_canton_and_fl'] == canton]
total_cases_graph = figure(title=canton, y_axis_label='total cases', y_range=[0, max_total_cases], x_axis_type='datetime')
total_cases_graph.line(update_for_canton['date'], update_for_canton['total_cases_relative'], line_width=1)
graphs.append(total_cases_graph)
show(create_grid(graphs, sizing_mode='scale_width'))
worldwide_pretty
| Date | Country_Region | New_Cases | Total_Cases | New_Deaths | |
|---|---|---|---|---|---|
| 1 | 2020-06-01 | Afghanistan | 545.0 | 545.0 | 8.0 |
| 2 | 2020-06-02 | Afghanistan | 759.0 | 1304.0 | 8.0 |
| 3 | 2020-06-03 | Afghanistan | 758.0 | 2062.0 | 24.0 |
| 4 | 2020-06-04 | Afghanistan | 787.0 | 2849.0 | 6.0 |
| 5 | 2020-06-05 | Afghanistan | 915.0 | 3764.0 | 9.0 |
| ... | ... | ... | ... | ... | ... |
| 61329 | 2021-04-14 | Zimbabwe | 39.0 | 37191.0 | 5.0 |
| 61330 | 2021-04-15 | Zimbabwe | 53.0 | 37244.0 | 2.0 |
| 61331 | 2021-04-16 | Zimbabwe | 112.0 | 37356.0 | 1.0 |
| 61332 | 2021-04-17 | Zimbabwe | 165.0 | 37521.0 | 1.0 |
| 61333 | 2021-04-18 | Zimbabwe | 52.0 | 37573.0 | 1.0 |
61146 rows × 5 columns
swiss_pretty
| date | abbreviation_canton_and_fl | new_cases | total_cases | new_deaths | |
|---|---|---|---|---|---|
| 2346 | 2020-06-01 | BL | 1 | 1.0 | 0.0 |
| 2347 | 2020-06-01 | FR | 0 | 0.0 | 0.0 |
| 2348 | 2020-06-01 | GE | 1 | 1.0 | 0.0 |
| 2349 | 2020-06-01 | GR | 0 | 0.0 | 0.0 |
| 2350 | 2020-06-01 | JU | 0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... |
| 10397 | 2021-04-19 | SZ | 19 | 10371.0 | 2.0 |
| 10398 | 2021-04-19 | SH | 45 | 4122.0 | 0.0 |
| 10399 | 2021-04-19 | TG | 4 | 16701.0 | 0.0 |
| 10400 | 2021-04-19 | BS | 19 | 10410.0 | 0.0 |
| 10401 | 2021-04-19 | AI | 5 | 928.0 | 0.0 |
7553 rows × 5 columns